# delimit ;
clear ;
cd "replication" ;
set more off ; 

* *************************************************************************** ;
* merge baseline, midline, and endline together
* *************************************************************************** ;
use "./data/baseline.dta", clear ;

merge 1:1 Id using "./data/midline.dta" ;
gen has_nonmissing_midline = _merge == 3 ; 
gen has_missing_midline = _merge == 1 ;
assert _merge != 2 ;

gen has_missing_midline_not_wave1 = _merge == 1 if wave != 1 ; 
assert !missing(wave) ;
drop _merge ;

merge 1:1 Id using "./data/endline.dta" ;
gen has_missing_endline = _merge == 1 ;
assert _merge != 2 ; 
drop _merge ;

* *************************************************************************** ;
* generate baseline health index and interactions  
* *************************************************************************** ;

gen bl_health_index = (aids_curable + nightblindness_curable + wash_defecation + water_boil_filter + water_tap)/5 ; 

gen healthonlyXbl_health_index = healthonly*bl_health_index ; 
gen healthandpayXbl_health_index = healthandpay*bl_health_index ; 

****************************************************************************** ;
* generate randomization strata and wave-class variable
****************************************************************************** ;

egen strata = group(wave neighborhood female mfi) ;
egen waveclass = group(wave class) ;

* *************************************************************************** ;
* label variables for the table ; 
* *************************************************************************** ;

label var healthonly "HEE" ;
label var healthandpay "HEEC" ; 
label var bl_health_index "Baseline Health Index" ; 
label var healthonlyXbl_health_index "HEE $\ast$ Baseline Health Index" ;
label var healthandpayXbl_health_index "HEEC $\ast$ Baseline Health Index" ;

label var has_missing_midline "\shortstack[c]{Has\\missing \\ midline}" ;
label var has_missing_midline_not_wave1 "\shortstack[c]{Has\\missing\\midline}" ; 
label var has_missing_endline "\shortstack[c]{Has\\missing\\endline}" ; 

* *************************************************************************** ;
* regressions: differential attrition
* *************************************************************************** ;

est drop _all ;

* in levels ; 
foreach var in midline midline_not_wave1 endline { ;

	areg has_missing_`var' healthonly healthandpay, a(strata) robust ;
	est sto `var'A ;
	
	test healthonly healthandpay ; 
	estadd local titlerow = "" ; 
	estadd scalar fstat = r(F): `var'A ; 
	estadd scalar pval = r(p): `var'A ; 

	summarize has_missing_`var' if control == 1 & e(sample) == 1;
	estadd scalar cmean = r(mean): `var'A ; 
	estadd scalar csd = r(sd): `var'A; 
	
	if "`var'" == "midline_not_wave1" estadd local regsample = "Excl. Wave 1": `var'A; 
	if "`var'" != "midline_not_wave1" estadd local regsample = "All Particip.": `var'A; 
	
} ;

* in baseline characteristics ; 
foreach var in midline midline_not_wave1 endline { ;

	areg has_missing_`var' healthonly healthandpay bl_health_index 
	healthonlyXbl_health_index healthandpayXbl_health_index, a(strata) robust ;
	est sto `var'B ;
	
	test healthonly healthandpay bl_health_index 
	healthonlyXbl_health_index healthandpayXbl_health_index ; 
	estadd local titlerow = "" ; 
	estadd scalar fstat = r(F): `var'B ; 
	estadd scalar pval = r(p): `var'B ; 

	summarize has_missing_`var' if control == 1 & e(sample) == 1 ;
	estadd scalar cmean = r(mean): `var'B ; 
	estadd scalar csd = r(sd): `var'B ; 

	if "`var'" == "midline_not_wave1" estadd local regsample = "Excl. Wave 1": `var'B; 
	if "`var'" != "midline_not_wave1" estadd local regsample = "All Particip.": `var'B; 

} ;
	
esttab midlineA midlineB midline_not_wave1A midline_not_wave1B endlineA endlineB using "./output/appendix-table-attrition-analysis.tex",
	replace
	drop(_cons) 
	cells(b(label() star fmt(%9.3f %9.3f)) se(par)) 
	star(* 0.10 ** 0.05 *** 0.01) 
	stats(titlerow fstat pval cmean csd N, fmt(%9s %9.3f %9.3f %9.3f %9.3f %9.0f) labels("\(F\)-test, all coefficients = 0" "$\qquad$ \(F\)-statistic" "$\qquad$ \(p\)-value" "Control Mean" "Control SD" "N"))
	prehead(\begin{table}[h!] \centering \small `"\def\sym#1{\ifmmode^{#1}\else\(^{#1}\)\fi}"'
		\captionsetup{justification=centering} 
		\caption{Attrition Analysis}
		\label{attrition-analysis} 
		\begin{tabular*}{\hsize}{p{4.7cm}p{1.5cm}p{1.5cm}p{1.5cm}p{1.5cm}p{1.5cm}p{1.5cm}}
		\toprule)
	legend label  booktabs  collabels( , none)
	mgroups(
	"Full Sample" 
	"Excluding Wave 1"
	"Full Sample"
	, pattern(1 0 1 0 1 0) prefix(\multicolumn{@span}{c}{) suffix(}) span erepeat(\cmidrule(lr){@span}))
	postfoot(`"\bottomrule"'  \end{tabular*} \captionsetup{justification=justified, width=\hsize} 
		\caption*{\footnotesize \textit{Notes:} 
		The dependent variables are dummies equal to one if the respondent has missing data in the given survey.
		\textit{Baseline Health Index} is the proportion of all five baseline health variables (reported in Table \ref{table-baseline-summstats}) that are equal to one for the respondent. The odd-numbered columns report the \(F\)-test for the joint significance of the coefficients on \textit{HEE} and \textit{HEEC}. The even-numbered columns report the \(F\)-test for the joint significance of the coefficients on \textit{HEE}, \textit{HEEC}, \textit{Baseline Health Index}, as well as the interactions \textit{HEE $\ast$ Baseline Health Index} and \textit{HEEC $\ast$ Baseline Health Index}.
		Robust standard errors.
		All regressions include dummies for randomization strata, where strata are defined by gender, neighborhood, and microfinance client status.
		***\$\,p < 0.01$, **\$\,p < 0.05$, *\$\,p<0.10$.
		}
		\end{table}) ;
		
	

